Demo

library("tidyverse")
library("palmerpenguins")
library("janitor")
glimpse(penguins_raw)
## Rows: 344
## Columns: 17
## $ studyName             <chr> "PAL0708", "PAL0708", "PAL0708", "PAL0708", "PAL…
## $ `Sample Number`       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ Species               <chr> "Adelie Penguin (Pygoscelis adeliae)", "Adelie P…
## $ Region                <chr> "Anvers", "Anvers", "Anvers", "Anvers", "Anvers"…
## $ Island                <chr> "Torgersen", "Torgersen", "Torgersen", "Torgerse…
## $ Stage                 <chr> "Adult, 1 Egg Stage", "Adult, 1 Egg Stage", "Adu…
## $ `Individual ID`       <chr> "N1A1", "N1A2", "N2A1", "N2A2", "N3A1", "N3A2", …
## $ `Clutch Completion`   <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", …
## $ `Date Egg`            <date> 2007-11-11, 2007-11-11, 2007-11-16, 2007-11-16,…
## $ `Culmen Length (mm)`  <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34…
## $ `Culmen Depth (mm)`   <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18…
## $ `Flipper Length (mm)` <dbl> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190,…
## $ `Body Mass (g)`       <dbl> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 34…
## $ Sex                   <chr> "MALE", "FEMALE", "FEMALE", NA, "FEMALE", "MALE"…
## $ `Delta 15 N (o/oo)`   <dbl> NA, 8.94956, 8.36821, NA, 8.76651, 8.66496, 9.18…
## $ `Delta 13 C (o/oo)`   <dbl> NA, -24.69454, -25.33302, NA, -25.32426, -25.298…
## $ Comments              <chr> "Not enough blood for isotopes.", NA, NA, "Adult…
old_names = colnames(penguins_raw)
penguins = penguins_raw %>% 
    janitor::clean_names()
new_names = colnames(penguins)
new_names
##  [1] "study_name"        "sample_number"     "species"          
##  [4] "region"            "island"            "stage"            
##  [7] "individual_id"     "clutch_completion" "date_egg"         
## [10] "culmen_length_mm"  "culmen_depth_mm"   "flipper_length_mm"
## [13] "body_mass_g"       "sex"               "delta_15_n_o_oo"  
## [16] "delta_13_c_o_oo"   "comments"
library("ggplot2")
penguins = penguins |>
    mutate(species = stringr::word(species, start = 1, end = 1))
penguins %>%  
  ggplot() + 
  # add the aesthetics
  aes(x = body_mass_g, 
      y = flipper_length_mm,
      colour = species) +
  # add a geometry
  geom_point() + 
  # tidy up the labels
  labs(x = "Body mass (g)",
       y = "Flipper length (mm)",
       colour = "Species")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

#ggsave(filename = "myfirstggplot.png") to save the plot

Exercises

  1. Generate a scatter plot for another pair of (numeric) variables
plt = penguins %>%  
  ggplot() +
  aes(x = culmen_length_mm, 
      y = culmen_depth_mm,
      colour = species) +
  geom_point() +
  labs(x = "Culmen Length (mm)", 
       y = "Culment Depth (mm)", 
       colour = "Species")
plt
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

  1. Colour by sex and use facet_wrap() to generate a plot for each species and island combination.
plt2 = penguins %>%  
  ggplot() +
  aes(x = culmen_length_mm, 
      y = culmen_depth_mm,
      colour = sex) +
  geom_point() +
  labs(x = "Culmen Length (mm)", 
       y = "Culment Depth (mm)", 
       colour = "sex")
plt2 + facet_wrap(vars(species, island))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

  1. Try including a line of best fit by adding another geometry layer geom_smooth(method = “lm”).
plt + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

  1. Use a different geometry, geom_histogram() to create a histogram for flipper length, coloured by species.
plt3 = penguins %>% 
  ggplot() +
  aes(x = flipper_length_mm, fill = species) +
  geom_histogram() + 
  labs(x = "Flipper Length (mm)", fill = "Species")

plt3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_bin()`).

Advanced

# install.packages("plotly")
library("plotly")
myplot = penguins |> 
  ggplot() +
  # add the aesthetics
  aes(x = body_mass_g,
      y = flipper_length_mm,
      colour = species) +
  # add a geometry
  geom_point() +
  # tidy up the labels
  labs(x = "Body mass (g)",
       y = "Flipper length (mm)",
       colour = "Species")
plotly::ggplotly(myplot)